sitemap.xml example

工作流概述

这是一个包含21个节点的复杂工作流,主要用于自动化处理各种任务。

工作流源代码

下载
{
  "id": "7i2RqqCYaKHUt4n3",
  "meta": {
    "instanceId": "fb924c73af8f703905bc09c9ee8076f48c17b596ed05b18c0ff86915ef8a7c4a"
  },
  "name": "Google Site Index - sitemap.xml example",
  "tags": [],
  "nodes": [
    {
      "id": "4da50fbf-7707-42ea-badc-6748c4ee30db",
      "name": "When clicking \"Test workflow\"",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -927,
        472
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "9e5bd6c8-a056-462b-b746-60d86bfbe398",
      "name": "Split Out",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        480,
        360
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "urlset.url"
      },
      "typeVersion": 1
    },
    {
      "id": "0d4acf98-31c5-4a0d-bb29-c1d045c0705c",
      "name": "Check status",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        1540,
        400
      ],
      "parameters": {
        "url": "=https://indexing.googleapis.com/v3/urlNotifications/metadata?url={{ encodeURIComponent($json.loc) }}",
        "options": {
          "response": {
            "response": {
              "fullResponse": true
            }
          }
        },
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "googleOAuth2Api"
      },
      "credentials": {
        "googleOAuth2Api": {
          "id": "K8Cz9Dy3TR68udv2",
          "name": "Google account"
        }
      },
      "retryOnFail": false,
      "typeVersion": 4.1
    },
    {
      "id": "eee0eba6-3aa3-4841-9d48-8407db1212e2",
      "name": "Loop Over Items",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        1340,
        360
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "47745d33-8358-45a8-a67d-60f9f0574bae",
      "name": "Wait",
      "type": "n8n-nodes-base.wait",
      "position": [
        2080,
        400
      ],
      "webhookId": "44364241-e54b-4b44-aaa1-0d8121a7f497",
      "parameters": {
        "unit": "seconds",
        "amount": "={{ Math.min(1.5,0.3+3*Math.random()).toFixed(2) }}"
      },
      "typeVersion": 1
    },
    {
      "id": "9f1bf72e-8ecd-4239-b96f-b77be4c86b18",
      "name": "URL Updated",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1840,
        400
      ],
      "parameters": {
        "url": "=https://indexing.googleapis.com/v3/urlNotifications:publish",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "authentication": "predefinedCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "url",
              "value": "={{ $('Loop Over Items').item.json.loc }}"
            },
            {
              "name": "type",
              "value": "URL_UPDATED"
            }
          ]
        },
        "nodeCredentialType": "googleOAuth2Api"
      },
      "credentials": {
        "googleOAuth2Api": {
          "id": "K8Cz9Dy3TR68udv2",
          "name": "Google account"
        }
      },
      "typeVersion": 4.1
    },
    {
      "id": "629eaf34-ef3c-4e9c-9537-69a03310dd9c",
      "name": "Schedule Trigger",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        -927,
        272
      ],
      "parameters": {
        "rule": {
          "interval": [
            {
              "triggerAtHour": 2,
              "triggerAtMinute": 5
            }
          ]
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "2f95065c-fdc9-4773-87b0-37007ae4f9a5",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -87,
        192
      ],
      "parameters": {
        "width": 851.3475816949383,
        "height": 340.39627039627067,
        "content": "## Collect list of URLs

This part extracts all pages from all sitemaps and sorts by the last modified date `lastmod` (from newest to oldest)"
      },
      "typeVersion": 1
    },
    {
      "id": "33798da1-4fd3-43dc-9ff4-753bae798535",
      "name": "is new?",
      "type": "n8n-nodes-base.if",
      "position": [
        1700,
        280
      ],
      "parameters": {
        "options": {
          "looseTypeValidation": true
        },
        "conditions": {
          "options": {
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "c8566fc4-57cf-4272-841e-014bb354a37d",
              "operator": {
                "type": "dateTime",
                "operation": "after"
              },
              "leftValue": "={{ $('Loop Over Items').item.json.lastmod }}",
              "rightValue": "={{ $json.body.latestUpdate.notifyTime }}"
            }
          ]
        }
      },
      "typeVersion": 2
    },
    {
      "id": "b5d538ec-d7bc-40ac-9b9e-e5ead9378387",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1500,
        121.07782938758908
      ],
      "parameters": {
        "width": 504.2424242424241,
        "height": 431.1089918072487,
        "content": "## Check URL metadata and update, if:
* Google returns error (404 error means that this URL was not previously added)
* Date of article update is after the date of last request to re-index"
      },
      "typeVersion": 1
    },
    {
      "id": "2cc0b088-b09f-4dc2-8027-9e0ff442576b",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -640,
        196.4335593220339
      ],
      "parameters": {
        "width": 515.8058994999984,
        "height": 335.72259887005646,
        "content": "## Get sitemap.xml
Various CMS systems often have multiple sitemaps for different content (posts, tags, pages etc). Need to fetch all sitemaps first and then extract all pages from all sitemaps.
### Remember to update the real sitemap URL!"
      },
      "typeVersion": 1
    },
    {
      "id": "d8dc3b65-0d47-49a7-9042-33dbc5a2e245",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -662.5490981963931,
        120.2098305084748
      ],
      "parameters": {
        "color": 6,
        "width": 1458.468937875752,
        "height": 453.3292476478371,
        "content": "## Feel free to adapt this part depending on your website CMS
"
      },
      "typeVersion": 1
    },
    {
      "id": "a763f582-500c-4cc8-b780-672ebc3d0845",
      "name": "Get content-specific sitemaps",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -260,
        360
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "sitemapindex.sitemap"
      },
      "typeVersion": 1
    },
    {
      "id": "e7aa9728-eb9b-454d-a710-561d76841d7a",
      "name": "Convert sitemap to JSON",
      "type": "n8n-nodes-base.xml",
      "position": [
        -440,
        360
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "496366d7-0d4e-401c-a375-8ca8882e8a32",
      "name": "Force urlset.url to array",
      "type": "n8n-nodes-base.set",
      "position": [
        320,
        360
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "8d16114b-1d1a-4522-a550-6c799a44538a",
              "name": "=urlset.url",
              "type": "array",
              "value": "={{ $json.urlset.url[0] ? $json.urlset.url : [$json.urlset.url] }}"
            }
          ]
        }
      },
      "typeVersion": 3.3
    },
    {
      "id": "3a8e00a6-2fa4-4903-943d-890e0078181e",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        820,
        120
      ],
      "parameters": {
        "color": 3,
        "width": 459.2224448897797,
        "height": 451.39712985292624,
        "content": "## Update the `lastmod` and `loc` fields
These are pre-defined fields according to [the XML schema for the Sitemap protocol](https://www.sitemaps.org/protocol.html).

If your CMS system has different field names, please rename them here:
* the last modified field `lastmod`
* URL of the page in `loc` field"
      },
      "typeVersion": 1
    },
    {
      "id": "9d841026-ede6-4396-a67b-e1787ffe9a17",
      "name": "Assign mandatiry sitemap fields",
      "type": "n8n-nodes-base.set",
      "position": [
        1000,
        360
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "bb0e1337-6fda-4a22-9963-d0b1271fc2a6",
              "name": "lastmod",
              "type": "string",
              "value": "={{ $json.lastmod }}"
            },
            {
              "id": "e7517c23-f989-4d75-9078-d82c75e51c65",
              "name": "loc",
              "type": "string",
              "value": "={{ $json.loc }}"
            }
          ]
        }
      },
      "typeVersion": 3.3
    },
    {
      "id": "99787654-f554-4650-afc0-c4fa65392c2b",
      "name": "convert page data to JSON",
      "type": "n8n-nodes-base.xml",
      "position": [
        120,
        360
      ],
      "parameters": {
        "options": {
          "explicitArray": false
        }
      },
      "typeVersion": 1
    },
    {
      "id": "f5cc1725-955c-4eb2-a66f-93153ebf35d1",
      "name": "Get sitemap.xml",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -620,
        360
      ],
      "parameters": {
        "url": "https://wordpress.org/sitemap.xml",
        "options": {}
      },
      "typeVersion": 4.1
    },
    {
      "id": "789076f0-4aa1-469b-afac-af717c0b03c3",
      "name": "Get content of each sitemap",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -60,
        360
      ],
      "parameters": {
        "url": "={{ $json.loc }}",
        "options": {
          "batching": {
            "batch": {
              "batchSize": 1,
              "batchInterval": 150
            }
          }
        }
      },
      "typeVersion": 4.1
    },
    {
      "id": "b0bdc6d6-1306-4c0c-bec2-7e59d587db69",
      "name": "Sort",
      "type": "n8n-nodes-base.sort",
      "position": [
        640,
        360
      ],
      "parameters": {
        "options": {},
        "sortFieldsUi": {
          "sortField": [
            {
              "order": "descending",
              "fieldName": "lastmod"
            }
          ]
        }
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "callerPolicy": "workflowsFromSameOwner",
    "errorWorkflow": "6",
    "executionOrder": "v1",
    "saveManualExecutions": true,
    "saveDataSuccessExecution": "all"
  },
  "versionId": "5c21ebb6-67df-4bde-9aea-6cc9a7621fc0",
  "connections": {
    "Sort": {
      "main": [
        [
          {
            "node": "Assign mandatiry sitemap fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "is new?": {
      "main": [
        [
          {
            "node": "URL Updated",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Wait",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out": {
      "main": [
        [
          {
            "node": "Sort",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "URL Updated": {
      "main": [
        [
          {
            "node": "Wait",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check status": {
      "main": [
        [
          {
            "node": "is new?",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "URL Updated",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get sitemap.xml": {
      "main": [
        [
          {
            "node": "Convert sitemap to JSON",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "Check status",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Schedule Trigger": {
      "main": [
        [
          {
            "node": "Get sitemap.xml",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Convert sitemap to JSON": {
      "main": [
        [
          {
            "node": "Get content-specific sitemaps",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Force urlset.url to array": {
      "main": [
        [
          {
            "node": "Split Out",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "convert page data to JSON": {
      "main": [
        [
          {
            "node": "Force urlset.url to array",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get content of each sitemap": {
      "main": [
        [
          {
            "node": "convert page data to JSON",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get content-specific sitemaps": {
      "main": [
        [
          {
            "node": "Get content of each sitemap",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking \"Test workflow\"": {
      "main": [
        [
          {
            "node": "Get sitemap.xml",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Assign mandatiry sitemap fields": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

功能特点

  • 自动检测新邮件
  • AI智能内容分析
  • 自定义分类规则
  • 批量处理能力
  • 详细的处理日志

技术分析

节点类型及作用

  • Manualtrigger
  • Splitout
  • Httprequest
  • Splitinbatches
  • Wait

复杂度评估

配置难度:
★★★★☆
维护难度:
★★☆☆☆
扩展性:
★★★★☆

实施指南

前置条件

  • 有效的Gmail账户
  • n8n平台访问权限
  • Google API凭证
  • AI分类服务订阅

配置步骤

  1. 在n8n中导入工作流JSON文件
  2. 配置Gmail节点的认证信息
  3. 设置AI分类器的API密钥
  4. 自定义分类规则和标签映射
  5. 测试工作流执行
  6. 配置定时触发器(可选)

关键参数

参数名称 默认值 说明
maxEmails 50 单次处理的最大邮件数量
confidenceThreshold 0.8 分类置信度阈值
autoLabel true 是否自动添加标签

最佳实践

优化建议

  • 定期更新AI分类模型以提高准确性
  • 根据邮件量调整处理批次大小
  • 设置合理的分类置信度阈值
  • 定期清理过期的分类规则

安全注意事项

  • 妥善保管API密钥和认证信息
  • 限制工作流的访问权限
  • 定期审查处理日志
  • 启用双因素认证保护Gmail账户

性能优化

  • 使用增量处理减少重复工作
  • 缓存频繁访问的数据
  • 并行处理多个邮件分类任务
  • 监控系统资源使用情况

故障排除

常见问题

邮件未被正确分类

检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。

Gmail认证失败

确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。

调试技巧

  • 启用详细日志记录查看每个步骤的执行情况
  • 使用测试邮件验证分类逻辑
  • 检查网络连接和API服务状态
  • 逐步执行工作流定位问题节点

错误处理

工作流包含以下错误处理机制:

  • 网络超时自动重试(最多3次)
  • API错误记录和告警
  • 处理失败邮件的隔离机制
  • 异常情况下的回滚操作